"""
Author : Mr.Sun
Datetime : 2024/12/6 17:14 
FileName : key_match.py
Desc : 
"""
import re
import cpca
import dataset
from logger import logger
from data_operate import base_dir


class KeywordMatch:
    def __init__(self):
        # 数据库连接
        self.db = dataset.connect(
            f'sqlite:///{base_dir}',
            engine_kwargs={'connect_args': {'check_same_thread': True}},
            sqlite_wal_mode=False
        )
        self.query = "select key_word from key_words where status=1"
        self.table = self.db['key_words']

        # 缓存关键词
        self.cached_key_words = None

        # 大学名称关键词
        self.university_patterns = [
            r'\S+大学',
            r'\S+学院',
            r'\S+高校',
            r'\S+校区'
        ]

        # 招聘相关关键词
        self.job_keywords = {
            '招聘', '岗位', '教师', '博士', '人才', '编制',
            '编岗', '引进', '专业技术', '教职工'
        }

        # 排除关键词
        self.exclude_keywords = {
            '应届', '选调生', '优秀高校毕业生', '2024届', '2025届',
            '实习', '兼职', '临时','拟录用','博士'
        }

    def _get_key_words(self):
        """
        使用缓存机制获取关键词
        """
        if self.cached_key_words is None:
            self.cached_key_words = {x['key_word'] for x in self.db.query(self.query)}
        return self.cached_key_words

    def is_university_recruitment(self, title):
        """
        判断是否为大学招聘
        """
        # 检查是否包含大学名称模式
        university_match = any(
            re.search(pattern, title)
            for pattern in self.university_patterns
        )

        # 检查是否包含招聘关键词
        job_keyword_match = any(
            keyword in title
            for keyword in self.job_keywords
        )

        # 检查是否包含排除关键词
        exclude_match = any(
            keyword in title
            for keyword in self.exclude_keywords
        )

        # 组合判断逻辑
        return (
                university_match and
                job_keyword_match and
                not exclude_match
        )

    def key_word_match(self, title=None) -> str:
        """
        关键词匹配
        """
        if not title:
            return "None"

        # 优先检查是否为大学招聘
        if self.is_university_recruitment(title):
            return "大学招聘"

        try:
            # 使用cpca提取省市信息
            df = cpca.transform([title])
            province = df.at[0, '省']
            city = df.at[0, '市']

            # 获取数据库中的关键词
            key_words = self._get_key_words()

            if city in key_words:
                logger.info(f"{title} 匹配到的关键词是: {city}")
                return city

            if province in key_words:
                logger.info(f"{title} 匹配到的关键词是: {province}")
                return province

        except Exception as e:
            logger.error(f"匹配过程出错: {e}")

        logger.info(f"{title} 未匹配到关键词.")
        return "None"


if __name__ == '__main__':
    matcher = KeywordMatch()
    result = matcher.key_word_match("2025年天津医科大学口腔医院招聘3人方案(第一批)")
    print(result)
